Libraries Session
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__
from google.colab import drive
drive.mount('/content/drive')
import random
random.seed(0)
# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K
from PIL import Image
from numpy import asarray
A.Import and read ‘images.npy’.
# Loading the images file
data = np.load('/content/drive/MyDrive/images(1).npy', allow_pickle=True)
data.shape
The file contains 409 images and labels. Let's view few images and their labels.
data[0][0]
data[408][1]
B.Split the data into Features(X) & labels(Y). Unify shape of all the images
from tensorflow.keras.applications.mobilenet import preprocess_input
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
HEIGHT_CELLS = 28
WIDTH_CELLS = 28
IMAGE_SIZE = 224
masks = np.zeros((int(data.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH))
X = np.zeros((int(data.shape[0]),IMAGE_HEIGHT, IMAGE_WIDTH, 3))
for index in range(data.shape[0]):
img = data[index][0]
img = cv2.resize(img, dsize=(IMAGE_HEIGHT, IMAGE_WIDTH), interpolation=cv2.INTER_CUBIC)
# assign all pixels in the first 3 channels only to the image, i.e., discard the alpha channel
try:
img = img[:,:,:3]
except:
print(f"Exception {index} Grayscale image with shape {img.shape}")
# convert the grayscale image to color so that the number of channels are standardized to 3
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
continue
X[index] = preprocess_input(np.array(img, dtype=np.float32))
# Loop through the face co-ordinates and create mask out of it.
for i in data[index][1]:
x1 = int(i['points'][0]['x'] * IMAGE_WIDTH)
x2 = int(i['points'][1]['x'] * IMAGE_WIDTH)
y1 = int(i['points'][0]['y'] * IMAGE_HEIGHT)
y2 = int(i['points'][1]['y'] * IMAGE_HEIGHT)
# set all pixels within the mask co-ordinates to 1.
masks[index][y1:y2, x1:x2] = 1
print(f"### Shape of X is '{X.shape}' and the shape of mask is '{masks.shape}' ")
C.Split the data into train and test[400:9].
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, masks, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)
print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")
D.Select random image from the train data and display original image and masked image.
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(X_train[0])
a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(X_train[10])
imgplot.set_clim(0.0, 0.7)
a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(X_train[20])
imgplot.set_clim(0.0, 1.4)
a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(X_train[30])
imgplot.set_clim(0.0, 2.1)
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(y_train[0])
a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(y_train[10])
imgplot.set_clim(0.0, 0.7)
a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(y_train[20])
imgplot.set_clim(0.0, 1.4)
a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(y_train[30])
imgplot.set_clim(0.0, 1.4)
A.Design a face mask detection model.
IMAGE_SIZE = 224
EPOCHS = 15
BATCH = 8
LR = 1e-4
def model():
inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")
encoder = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=0.35)
skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
encoder_output = encoder.get_layer("block_13_expand_relu").output
f = [16, 32, 48, 64]
x = encoder_output
for i in range(1, len(skip_connection_names)+1, 1):
x_skip = encoder.get_layer(skip_connection_names[-i]).output
x = UpSampling2D((2, 2))(x)
x = Concatenate()([x, x_skip])
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(1, (1, 1), padding="same")(x)
x = Activation("sigmoid")(x)
model = Model(inputs, x)
return model
model = model()
model.summary()
B.Design your own Dice Coefficient and Loss function.
smooth = 1e-15
def dice_coef(y_true, y_pred):
y_true = tf.keras.layers.Flatten()(y_true)
y_pred = tf.keras.layers.Flatten()(y_pred)
intersection = tf.reduce_sum(y_true * y_pred)
return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)
def dice_loss(y_true, y_pred):
return 1.0 - dice_coef(y_true, y_pred)
Compliling the Model
opt = tf.keras.optimizers.Nadam(LR)
metrics = [dice_coef, Recall(), Precision()]
model.compile(loss=dice_loss, optimizer=opt, metrics=metrics)
callbacks = [
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
]
C.Train and tune the model as required.
Training the Model
train_steps = len(X_train)//BATCH
valid_steps = len(X_val)//BATCH
if len(X_train) % BATCH != 0:
train_steps += 1
if len(X_val) % BATCH != 0:
valid_steps += 1
model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=EPOCHS,
steps_per_epoch=train_steps,
validation_steps=valid_steps,
callbacks=callbacks
)
D.Evaluate and share insights on performance of the model.
test_steps = (len(X_test)//BATCH)
if len(X_test) % BATCH != 0:
test_steps += 1
model.evaluate(X_test, y_test, steps=test_steps)
The model has precision and recall of 65% and 62% respectively. The loss is 54% and dice coefficient is 44%.
3.Test the model predictions on the test image: ‘image with index 3 in the test data’ and visualise the predicted masks on the faces in the image.
image = cv2.resize(X_test[3], (IMAGE_WIDTH, IMAGE_HEIGHT))
feat_scaled = preprocess_input(np.array(image, dtype=np.float32))
feat_scaled
y_pred = model.predict(np.array([feat_scaled]))
y_pred
pred_mask = cv2.resize((1.0*(y_pred[0]>0.5)), (IMAGE_WIDTH,IMAGE_HEIGHT))
pred_mask
from google.colab.patches import cv2_imshow
# cv2_imshow((feat_scaled).astype(np.uint8))
cv2_imshow((feat_scaled).astype(np.uint8))
# plt.imshow((pred_mask))
plt.imshow((pred_mask).astype(np.uint16))
from tqdm.notebook import trange, tqdm
from IPython.display import Image, display, Markdown, clear_output
from zipfile import ZipFile
1.Read/import images from folder ‘training_images’.
project_path = '/content/drive/MyDrive/'
image_files = 'training_images-20211126T092819Z-001.zip'
images_zip_path = os.path.join(project_path, image_files)
with ZipFile(images_zip_path, 'r') as z:
z.extractall()
## Get the Unzipped Location in the drive
zip_dir_loc = z.filelist[0].filename.split("/")[0]
zip_dir_loc
raw_img_file_names = [os.path.join(zip_dir_loc,i) for i in os.listdir(zip_dir_loc)]
raw_img_file_names[:5]
#Reading the images
img_list = []
for imgs in tqdm(raw_img_file_names):
tst_img = cv2.imread(imgs)
img_list.append(tst_img)
img_list = np.array(img_list)
display(Markdown(f"#### {img_list.shape}"))
# Viewing random images
from google.colab.patches import cv2_imshow
for i in img_list[:5,]:
cv2_imshow(cv2.resize(i,(224,224)))
#Defining a function to create bounding boxes
def test_bb(df,fname,title=""):
tst_img = cv2.imread(fname)
temp_df = df[df['Image_Name'] == fname]
rect_img = []
for rows in temp_df.index:
x = df['x'][rows]
y = df['y'][rows]
w = df['w'][rows]
h = df['h'][rows]
cv2.rectangle(tst_img,(x,y),(x+w,y+h),(255,0,0),2)
cv2.putText(tst_img, title, (int((x+w)*0.75),y-3),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255))
cv2_imshow(tst_img)
return
#Defining a function to read images and resize them
def show_face(img_list,scale=1.0):
for imgs in img_list:
img = cv2.imread(imgs)
img_w = int(img.shape[1]*scale)
img_h = int(img.shape[0]*scale)
img = cv2.resize(img,(img_w,img_h))
display(Markdown(f"#### {imgs}"))
cv2_imshow(img)
return
#Downloading the HAAR Model
!wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
haar_img_box_df = pd.DataFrame(columns=['x','y','w','h','Total_Faces','Image_Name'])
haar_img_box_df
# Detecting Faces using HAAR Model
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
test_img = cv2.imread(raw_img_file_names[0])
grey = cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)
# Detect faces
faces = face_cascade.detectMultiScale(grey,1.1,4)
# Draw rectangle around the faces
for (x, y, w, h) in faces:
cv2.rectangle(test_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
cv2.putText(test_img, "HaarCascadeClassifier", (int((x+w)*0.75),y-3),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255))
# Display the output
display(Markdown(f"### Bounding Box parameters are `x`:{x}, `y`:{y}, `width`:{w}, `height`:{h}"))
cv2_imshow(test_img)
# Detecting faces for all the images
%%time
haar_undetected_images = []
haar_detected_images = []
for imgs, fnames in tqdm(zip(img_list,raw_img_file_names)):
gray = cv2.cvtColor(imgs,cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray,1.1,4)
if len(faces) == 0:
haar_undetected_images.append(fnames)
temp_dict = {'x':0,
'y':0,
'w':-1,
'h':-1,
'Total_Faces':0,
'Image_Name':fnames}
else:
haar_detected_images.append(fnames)
for (x,y,w,h) in faces:
temp_dict = {'x':x,
'y':y,
'w':w,
'h':h,
'Total_Faces':len(faces),
'Image_Name':fnames}
haar_img_box_df = haar_img_box_df.append(temp_dict,ignore_index=True)
display(Markdown(f"#### Detected faces for {len(haar_detected_images)} images"))
display(Markdown(f"#### Failed to detect faces for {len(haar_undetected_images)} images"))
haar_img_box_df
haar_img_box_df[haar_img_box_df['Total_Faces'] > 1]
# Viewing samples of correctly and incorrectly detected faces using the HAAR Model
display(Markdown("### (1) Correctly detected 1 face"))
test_bb(haar_img_box_df,"training_images/real_00115.jpg",title="Haar")
display(Markdown("### (2) Incorrectly detected multiple faces"))
test_bb(haar_img_box_df,"training_images/real_00730.jpg",title="Haar")
# Incorrectly detected faces using the HAAR Model
show_face(haar_undetected_images[-5:],scale=0.4)
# Downloading the MTCNN model to detect faces
!pip install mtcnn
from mtcnn.mtcnn import MTCNN
mtcnn_det = MTCNN()
# Detecting faces using the MTCNN Model
mtcnn_tst_img = cv2.imread(raw_img_file_names[0])
mt_cvt = cv2.cvtColor(mtcnn_tst_img,cv2.COLOR_BGR2RGB)
mt_faces = mtcnn_det.detect_faces(mt_cvt)
for face in mt_faces:
mt_x, mt_y,mt_w,mt_h = face['box']
cv2.rectangle(mtcnn_tst_img,(mt_x,mt_y),(mt_x + mt_w,mt_y + mt_h),(255,0,0),2)
cv2.putText(mtcnn_tst_img, "MTCNN", (int((mt_x+mt_w)*1),mt_y-3),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255))
cv2_imshow(mtcnn_tst_img)
mtcnn_img_box_df = pd.DataFrame(columns=['x','y','w','h','Total_Faces','Image_Name'])
mtcnn_img_box_df
%%time
mtcnn_undetected_images = []
mtcnn_detected_images = []
for imgs, fnames in tqdm(zip(img_list,raw_img_file_names)):
cvt_img = cv2.cvtColor(imgs,cv2.COLOR_BGR2RGB)
faces = mtcnn_det.detect_faces(cvt_img)
if len(faces) == 0:
mtcnn_undetected_images.append(fnames)
temp_dict = {'x':0,
'y':0,
'w':-1,
'h':-1,
'Total_Faces':0,
'Image_Name':fnames}
else:
mtcnn_detected_images.append(fnames)
for face in faces:
temp_dict = {'x':face['box'][0],
'y':face['box'][1],
'w':face['box'][2],
'h':face['box'][3],
'Total_Faces':len(faces),
'Image_Name':fnames}
mtcnn_img_box_df = mtcnn_img_box_df.append(temp_dict,ignore_index=True)
display(Markdown(f"#### Detected faces for {len(mtcnn_detected_images)} images"))
display(Markdown(f"#### Failed to detect faces for {len(mtcnn_undetected_images)} images"))
mtcnn_img_box_df
display(mtcnn_img_box_df[mtcnn_img_box_df['Total_Faces'] > 1])
display(Markdown(f"#### Number of images with more than 1 face detected : {len(mtcnn_img_box_df[mtcnn_img_box_df['Total_Faces'] > 1])}"))
display(Markdown("### (1) Correctly detected 1 face"))
test_bb(mtcnn_img_box_df,"training_images/real_00115.jpg",title="MTCNN")
display(Markdown("### (2) Correctly detected one face and incorrectly the other one"))
test_bb(mtcnn_img_box_df,"training_images/real_00699.jpg",title="MTCNN")
show_face(mtcnn_undetected_images,scale=0.4)
Observation:
There are about 5 images where a face could not be detected due to:
The MTCNN has detected face in images where,
# Displaying all the faces that were not detected by either HAAR or MTCNN Model
haar_set = set(haar_undetected_images)
mtcnn_set = set(mtcnn_undetected_images)
show_face(haar_set.intersection(mtcnn_set),0.4)
Observation:
1.Unzip, read and Load data(‘PINS.zip’) into session. [2 Marks]
2.Write function to create metadata of the image. [4 Marks]Hint: Metadata means derived information from the available data which can be useful for particular problem statement.
3.Write a loop to iterate through each and every image and create metadata for all the images. [4 Marks]
4.Generate Embeddings vectors on the each face in the dataset. [4 Marks]Hint: Use ‘vgg_face_weights.h5’
5.Build distance metrics for identifying the distance between two similar and dissimilar images. [4 Marks]
6.Use PCA for dimensionality reduction. [2 Marks]
7.Build an SVM classifier in order to map each image to its right person. [4 Marks]
8.Import and display the the test images. [2 Marks]Hint: ‘Benedict Cumberbatch9.jpg’ and ‘Dwayne Johnson4.jpg’ are the test images.
9.Use the trained SVM model to predict the face on both test images. [4 Marks]
cd '/content/drive/MyDrive/'
project_path = '/content/drive/MyDrive/'
# from zipfile import ZipFile
# # specifying the zip file name
# file_name = project_path + "PINS.zip"
# # opening the zip file in READ mode
# with ZipFile(file_name, 'r') as zip:
# # printing all the contents of the zip file
# # zip.printdir()
# # extracting all the files
# print('Extracting all the files now...')
# zip.extractall()
# print('Done!')
directory = '/content/drive/MyDrive/PINS'
# Renaming the images and extracting only the person's name
for filename in os.listdir(directory):
os.rename(os.path.join(directory,filename),os.path.join(directory, filename.replace(' ', '_').upper()))
str = "PINS_"
if str in filename:
filepath = os.path.join(directory, filename)
newfilepath = os.path.join(directory, filename.replace(str," "))
os.rename(filepath, newfilepath)
for subdir, dirs, files in os.walk(directory):
for file in files:
os.rename(os.path.join(subdir, file), os.path.join(subdir, file.replace(' ', '_')))
# Displaying all the labels
import glob
paths = glob.glob("PINS/*")
label_names = [os.path.split(x)[1] for x in paths]
print("Total labels: {}".format(len(label_names)))
print("Labels: {}".format((label_names)))
# Saving the metadata in a dataframe
df = pd.DataFrame(columns=['Person', 'No. of Images','File Path', 'Files'])
folders = [x[0] for x in os.walk(directory)]
for subfolders in folders:
fileList = os.listdir(subfolders)
filePath = os.path.abspath(subfolders)
person = (subfolders)[len(directory):]
df = df.append({'Person': person, 'No. of Images': len(fileList), 'File Path': filePath , 'Files': fileList}, ignore_index=True)
df
import glob
from mpl_toolkits.axes_grid1 import ImageGrid
from glob import glob
fig = plt.figure(1, (300, 100))
grid = ImageGrid(fig, 111, nrows_ncols=(5, 5), axes_pad=0.1)
n_img_class = []
counter = 0
for idx, label in enumerate(label_names):
paths = glob.glob(os.path.join("PINS/", label, "*.jpg"))
n_img_class.append([label, len(paths)])
perm = np.random.choice(len(paths), size=10)
title = True
for ii in perm:
ax = grid[counter]
if title:
ax.text(1800, 105, label, verticalalignment="center")
title = False
img = cv2.imread(paths[ii])
img = cv2.resize(img, (150, 150))
ax.axis("off")
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
counter += 1
plt.show()
# Creating the features and target sets
#save the path into variable to use it easily next times.
TRAIN_FOLDER= "/content/drive/MyDrive/PINS"
from glob import glob
images = []
classes=[]
missing=0
for class_folder_name in os.listdir(TRAIN_FOLDER):
class_folder_path = os.path.join(TRAIN_FOLDER, class_folder_name)
class_label = class_folder_name
for image_path in glob(os.path.join(class_folder_path, "*.jpg")):
image_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR)
if image_bgr is None: # if the file contain any missing value ignore it
missing += 1
continue
images.append(image_bgr)
classes.append(class_label)
(classes)
len(images)
# Performing preprocessing for all the images
# resize image
def resize_images(img):
img = np.array(img).astype(np.uint8)
#print(img.dtype)
res = cv2.resize(img,(224,224), interpolation = cv2.INTER_CUBIC)
return res
#save resized images into images.
images = [resize_images(img) for img in images]
images[0].shape
# Displaying the shapes of images and classes
#see number of images in each label
images = np.array(images)
classes = np.array(classes)
print("images shape: ", images.shape)
print("classes shape: ", classes.shape)
#Viewing the distribution of number of images in each class
import matplotlib as mp
values =[]
labels=[]
for label in set(classes):
values.append(len(images[classes == label]))
labels.append(label)
values.sort()
code=np.arange(0,13,1)
data_normalizer = mp.colors.Normalize()
color_map = mp.colors.LinearSegmentedColormap(
"my_map",
{
"red": [(0, 1.0, 1.0),
(1.0, .5, .5)],
"green": [(0, 0.5, 0.5),
(1.0, 0, 0)],
"blue": [(0, 0.50, 0.5),
(1.0, 0, 0)]
}
)
fig, ax = plt.subplots(figsize=(150,50))
ax.bar(labels, values, color=color_map(data_normalizer(code)))
plt.xticks(fontsize = 8)
plt.yticks(fontsize = 10)
plt.xlabel("Species", fontsize = 14)
plt.ylabel("Number of images", fontsize = 14)
plt.title("Distribution of images in each class", fontsize=14)
plt.show()
class IdentityMetadata():
def __init__(self, base, name, file):
# dataset base directory
self.base = base
# identity name
self.name = name
# image file name
self.file = file
def __repr__(self):
return self.image_path()
def image_path(self):
return os.path.join(self.base, self.name, self.file)
def load_metadata(path):
metadata = []
for i in os.listdir(path):
for f in os.listdir(os.path.join(path, i)):
metadata.append(IdentityMetadata(path, i, f))
return np.array(metadata)
metadata = load_metadata(directory)
! pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_applications --no-deps
filename = "/usr/local/lib/python3.7/dist-packages/keras_vggface/models.py"
text = open(filename).read()
open(filename, "w+").write(text.replace('keras.engine.topology', 'tensorflow.keras.utils'))
import tensorflow as tf
from keras_vggface.vggface import VGGFace
from numpy import expand_dims
from keras_vggface.utils import preprocess_input
# Detecting Faces for Face Recognition
!pip install mtcnn
# confirm mtcnn was installed correctly
import mtcnn
# print version
print(mtcnn.__version__)
# The function extract_face_from_image() extracts all faces from an image:
# extract a single face from a given photograph
def extract_face(filename, required_size=(224, 224)):
# load image from file
pixels = cv2.imread(filename)
# create the detector, using default weights
detector = mtcnn()
# detect faces in the image
results = detector.detect_faces(pixels)
# extract the bounding box from the first face
x1, y1, width, height = results[0]['box']
x2, y2 = x1 + width, y1 + height
# extract the face
face = pixels[y1:y2, x1:x2]
# resize pixels to the model size
image = Image.fromarray(face)
image = image.resize(required_size)
face_array = asarray(image)
return face_array
pip install keras_vggface
# check version of keras_vggface
import keras_vggface
# print version
print(keras_vggface.__version__)
# Loading the pretrained vggface weights
weights_file = '/content/drive/MyDrive/vgg_face_weights.h5'
#import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import ZeroPadding2D,Convolution2D,MaxPooling2D
from tensorflow.keras.layers import Dense,Dropout,Softmax,Flatten,Activation,BatchNormalization
from tensorflow.keras.preprocessing.image import load_img,img_to_array
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import tensorflow.keras.backend as K
# Define VGG_FACE_MODEL architecture
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))
# Load VGG Face model weights
model.load_weights(weights_file)
# Remove last Softmax layer and get model upto last flatten layer #with outputs 2622 units
vgg_face_descriptor=Model(inputs=model.layers[0].input,outputs=model.layers[-2].output)
cd '/content/drive/MyDrive/PINS/_PETER_DINKLAGE'
# Viewing a sample image
sample_pic = '/content/drive/MyDrive/PINS/_PETER_DINKLAGE/Peter_Dinklage100_142.jpg'
# load image from file
image = plt.imread(sample_pic)
plt.imshow(image)
Next, initialize an MTCNN() object into the detector variable and use the .detect_faces() method to detect the faces in an image. Let’s see what it returns:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from mtcnn.mtcnn import MTCNN
# create the detector, using default weights
detector = MTCNN()
# detect faces in the image
faces = detector.detect_faces(image)
for face in faces:
print(face)
For every face, a Python dictionary is returned, which contains three keys. The box key contains the boundary of the face within the image. It has four values: x- and y- coordinates of the top left vertex, width, and height of the rectangle containing the face. The other keys are confidence and keypoints. The keypoints key contains a dictionary containing the features of a face that were detected, along with their coordinates:
Highlighting Faces in an Image
Now that we’ve successfully detected a face, let’s draw a rectangle over it to highlight the face within the image to verify if the detection was correct.
To draw a rectangle, import the Rectangle object from matplotlib.patches:
from matplotlib.patches import Rectangle
Let’s define a function highlight_faces to first display the image and then draw rectangles over faces that were detected. First, read the image through imread() and plot it through imshow(). For each face that was detected, draw a rectangle using the Rectangle() class.
Finally, display the image and the rectangles using the .show() method. If you’re using Jupyter notebooks, you may use the %matplotlib inline magic command to show plots inline:
def highlight_faces(image_path, faces):
# display image
image = plt.imread(image_path)
plt.imshow(image)
ax = plt.gca()
# for each face, draw a rectangle based on coordinates
for face in faces:
x, y, width, height = face['box']
face_border = Rectangle((x, y), width, height,
fill=False, color='red')
ax.add_patch(face_border)
plt.show()
highlight_faces('/content/drive/MyDrive/PINS/_PETER_DINKLAGE/Peter_Dinklage105_176.jpg', faces)
cd '/content/drive/MyDrive/PINS/_MORGAN_FREEMAN'
image = plt.imread('/content/drive/MyDrive/PINS/_MORGAN_FREEMAN/Morgan_Freeman116_580.jpg')
faces = detector.detect_faces(image)
highlight_faces('/content/drive/MyDrive/PINS/_MORGAN_FREEMAN/Morgan_Freeman116_580.jpg', faces)
In these two images, you can see that the MTCNN algorithm correctly detects faces. Let’s now extract this face from the image to perform further analysis on it.
Extracting Face for Further Analysis
At this point, you know the coordinates of the faces from the detector. Extracting the faces is a fairly easy task using list indices. However, the VGGFace2 algorithm that we use needs the faces to be resized to 224 x 224 pixels. We’ll use the PIL library to resize the images.
# The function extract_face_from_image() extracts all faces from an image:
# extract a single face from a given photograph
def extract_face(filename, required_size=(224, 224)):
# load image from file
pixels = cv2.imread(filename)
# create the detector, using default weights
detector = MTCNN()
# detect faces in the image
results = detector.detect_faces(pixels)
# extract the bounding box from the first face
x1, y1, width, height = results[0]['box']
x2, y2 = x1 + width, y1 + height
# extract the face
face = pixels[y1:y2, x1:x2]
# resize pixels to the model size
image = Image.fromarray(face)
image = image.resize(required_size)
face_array = asarray(image)
return face_array
cd '/content/drive/MyDrive/PINS/_ELIZA_TAYLOR'
# import Image, ImageTk
# load the photo and extract the face
pixels = extract_face('/content/drive/MyDrive/PINS/_ELIZA_TAYLOR/eliza_taylor101.jpg')
# plot the extracted face
plt.imshow(pixels)
# show the plot
plt.show()
pip install keras_vggface
pip install keras_applications
# Performing image preprocessing before fed into the VGG Face model
from numpy import expand_dims
from keras_vggface.utils import preprocess_input
# load the photo and extract the face
pixels = extract_face('/content/drive/MyDrive/PINS/_ELIZA_TAYLOR/eliza_taylor101.jpg')
# convert one face into samples
pixels = pixels.astype('float32')
samples = expand_dims(pixels, axis=0)
# prepare the face for the model, e.g. center pixels
samples = preprocess_input(samples, version=2)
# Predicting the label of the sample image
from keras_vggface.utils import decode_predictions
# perform prediction
yhat = vgg_face_descriptor.predict(samples)
# convert prediction into names
results = decode_predictions(yhat)
# display most likely results
for result in results[0]:
print('%s: %.2f%%' % (result[0], result[1]))
The model was able to predict the image correctly as Natalie_Portman but the confidence level is very less with only 19.3%
Performing Face Verification With VGGFace2
A VGGFace2 model can be used for face verification.
This involves calculating a face embedding for a new given face and comparing the embedding to the embedding for the single example of the face known to the system.
A face embedding is a vector that represents the features extracted from the face. This can then be compared with the vectors generated for other faces. For example, another vector that is close (by some measure) may be the same person, whereas another vector that is far (by some measure) may be a different person.
Typical measures such as Euclidean distance and Cosine distance are calculated between two embeddings and faces are said to match or verify if the distance is below a predefined threshold, often tuned for a specific dataset or application.
First, we can load the VGGFace model without the classifier by setting the ‘include_top‘ argument to ‘False‘, specifying the shape of the output via the ‘input_shape‘ and setting ‘pooling‘ to ‘avg‘ so that the filter maps at the output end of the model are reduced to a vector using global average pooling.
def get_embeddings(filenames):
# extract faces
faces = [extract_face(f) for f in filenames]
# convert into an array of samples
samples = asarray(faces, 'float32')
# prepare the face for the model, e.g. center pixels
samples = preprocess_input(samples, version=2)
# perform prediction
yhat = vgg_face_descriptor.predict(samples)
return yhat
We can take our photograph of Natalie Portman used previously (e.g. Natalie_Portman_46.jpg) as our definition of the identity of Natalie Portman by calculating and storing the face embedding for the face in that photograph.
We can then calculate embeddings for faces in other photographs of Natalie Portman and test whether we can effectively verify her identity. We can also use faces from photographs of other people to confirm that they are not verified as Natalie Portman.
Verification can be performed by calculating the Cosine distance between the embedding for the known identity and the embeddings of candidate faces. This can be achieved using the cosine() SciPy function. The maximum distance between two embeddings is a score of 1.0, whereas the minimum distance is 0.0. A common cut-off value used for face identity is between 0.4 and 0.6, such as 0.5, although this should be tuned for an application.
The is_match() function below implements this, calculating the distance between two embeddings and interpreting the result.
# Defining functions to measure the distance between two different images to find how identical they are
# determine if a candidate face is a match for a known face
def is_match(known_embedding, candidate_embedding, thresh=0.5):
# calculate distance between embeddings
score = cosine(known_embedding, candidate_embedding)
if score <= thresh:
print('>face is a Match (%.3f <= %.3f)' % (score, thresh))
else:
print('>face is NOT a Match (%.3f > %.3f)' % (score, thresh))
def L2Norm(H1,H2):
distance =0
for i in range(len(H1)):
distance += np.square(H1[i]-H2[i])
return np.sqrt(distance)
# Encoding the labels
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
y = enc.fit_transform(classes)
num_classes = len(np.unique(y))
num_classes
Splitting data into training and testing set
training set : 70%
testing set: 30%
X_train, X_test, y_train, y_test = train_test_split(images, y, test_size=0.3, random_state=50)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
print(f"#### Shapes of Train, Test and Validation sets are: {X_train.shape},{X_test.shape},{X_val.shape}")
print(f"#### Shapes of Train, Test and Validation sets of Target are: {y_train.shape},{y_test.shape},{y_val.shape}")
Classification using Supervised Learning Techniques
Support Vector Machines
The dataset have over 65536 features for each image and only 3336 images total in train dataset. To use an SVM, our model of choice, the number of features needs to be reduced.
PCA is a way of linearly transforming the data such that most of the information in the data is contained within a smaller number of features called components.
def flatten(X):
Xr, Xg, Xb = [],[],[]
for samples in X:
r, g, b = cv2.split(samples)
Xr.append(r.flatten())
Xg.append(g.flatten())
Xb.append(b.flatten())
Xr = np.array(Xr)
Xg = np.array(Xg)
Xb = np.array(Xb)
return (Xr, Xg, Xb)
X_train_r, X_train_g, X_train_b = flatten(X_train)
X_test_r, X_test_g, X_test_b = flatten(X_test)
X_val_r, X_val_g, X_val_b = flatten(X_val)
X_train_r.shape, X_train_g.shape, X_train_b.shape,
X_test_r.shape, X_test_g.shape, X_test_b.shape,
X_val_r.shape, X_val_g.shape, X_val_b.shape
from sklearn.decomposition import PCA as RandomizedPCA
n_components = 500
X_train_pca_r = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train_r)
X_train_pca_g = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train_g)
X_train_pca_b = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train_b)
X_test_pca_r = RandomizedPCA(n_components=n_components, whiten=True).fit(X_test_r)
X_test_pca_g = RandomizedPCA(n_components=n_components, whiten=True).fit(X_test_g)
X_test_pca_b = RandomizedPCA(n_components=n_components, whiten=True).fit(X_test_b)
X_val_pca_r = RandomizedPCA(n_components=n_components, whiten=True).fit(X_val_r)
X_val_pca_g = RandomizedPCA(n_components=n_components, whiten=True).fit(X_val_g)
X_val_pca_b = RandomizedPCA(n_components=n_components, whiten=True).fit(X_val_b)
Xr_train_pca = X_train_pca_r.transform(X_train_r)
Xg_train_pca = X_train_pca_g.transform(X_train_g)
Xb_train_pca = X_train_pca_b.transform(X_train_b)
Xr_test_pca = X_test_pca_r.transform(X_test_r)
Xg_test_pca = X_test_pca_g.transform(X_test_g)
Xb_test_pca = X_test_pca_b.transform(X_test_b)
Xr_val_pca = X_val_pca_r.transform(X_val_r)
Xg_val_pca = X_val_pca_g.transform(X_val_g)
Xb_val_pca = X_val_pca_b.transform(X_val_b)
X_train_pca_r.explained_variance_ratio_.sum(), X_train_pca_g.explained_variance_ratio_.sum(), X_train_pca_b.explained_variance_ratio_.sum(),
X_test_pca_r.explained_variance_ratio_.sum(), X_test_pca_g.explained_variance_ratio_.sum(), X_test_pca_b.explained_variance_ratio_.sum(),
X_val_pca_r.explained_variance_ratio_.sum(), X_val_pca_g.explained_variance_ratio_.sum(), X_val_pca_b.explained_variance_ratio_.sum()
X_train_pca = np.concatenate([Xr_train_pca,Xg_train_pca,Xb_train_pca], axis=1)
X_test_pca = np.concatenate([Xr_test_pca,Xg_test_pca,Xb_test_pca], axis=1)
X_val_pca = np.concatenate([Xr_val_pca,Xg_val_pca,Xb_val_pca], axis=1)
X_train_pca.shape, y_train.shape,
X_test_pca.shape, y_test.shape,
X_val_pca.shape, y_val.shape
from sklearn.svm import SVC
from sklearn.model_selection import learning_curve, GridSearchCV
param_grid = [
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
svc = SVC()
clf = GridSearchCV(svc, param_grid, verbose=10, n_jobs=-1)
clf.fit(X_train_pca, y_train)
X_train.shape[0]
y_train.shape[0]
svm_score = clf.score(X_val_pca, y_val)
from sklearn import svm, metrics
y_pred = clf.predict(X_test_pca)
svm_accuracy = metrics.accuracy_score(y_test, y_pred)
# Prediction using SVM
image = '/content/drive/MyDrive/PINS/_ELIZA_TAYLOR/eliza_taylor65.jpg'
img_pred = plt.imread(image)
img_pred_resize = cv2.resize(img_pred,(224,224), interpolation = cv2.INTER_CUBIC)
img_predict = np.expand_dims(img_pred_resize, axis=0)
pred_img = np.squeeze(img_predict, axis=0)
X_pred_r, X_pred_g, X_pred_b = cv2.split(pred_img)
X_pred_pca_r = X_train_pca_r.transform(np.expand_dims(X_pred_r.flatten(), axis=0))
X_pred_pca_g = X_train_pca_g.transform(np.expand_dims(X_pred_g.flatten(), axis=0))
X_pred_pca_b = X_train_pca_b.transform(np.expand_dims(X_pred_b.flatten(), axis=0))
X_pred_pca = np.concatenate([X_pred_pca_r,X_pred_pca_g,X_pred_pca_b], axis=1)
pred_svm = clf.predict(X_pred_pca)[0]
print(f"The predicted label is:'{pred_svm}'")
example_identity = enc.inverse_transform(np.ravel(pred_svm))[0]
plt.imshow(img_pred_resize)
plt.title(f'Recognized as {example_identity}');
model_performance = pd.DataFrame(columns=['Model', 'Accuracy', 'Predicted Person'])
model_performance = model_performance.append({'Model':'SVM',
'Accuracy': svm_accuracy,
'Predicted Person': example_identity
}, ignore_index=True)
model_performance